package com.six.compress.old;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.orc.CompressionKind;
import org.apache.orc.OrcFile;
import org.apache.orc.TypeDescription;
import org.apache.orc.Writer;
import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;

import java.util.Random;

@SuppressWarnings("all")
public class ORCWriterStringTest {

    public static void main(String[] args) throws Exception {
        //定义ORC数据结构，即表结构
        TypeDescription schema = TypeDescription.createStruct();
        schema.addField("channel", TypeDescription.createString());
        schema.addField("time", TypeDescription.createString());
        for (int i = 1; i < 15001; i++) {
            schema.addField("p" + i, TypeDescription.createString());
        }
        Writer writer = null;
        try {
            //输出ORC文件本地绝对路径
            String lxw_orc1_file = "/home/hdfs/data/fly_param_string_"+System.currentTimeMillis()+".orc";
            Configuration conf = new Configuration();
            FileSystem.getLocal(conf);
            writer = OrcFile.createWriter(
                    new Path(lxw_orc1_file),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema)
//                            .stripeSize(1024)
//                            .bufferSize(1024)
//                            .blockSize(1024)
                            .compress(CompressionKind.LZ4)
                            .version(OrcFile.Version.V_0_12)
            );

            VectorizedRowBatch batch = schema.createRowBatch(1000);
            long time = System.currentTimeMillis();
            Random random = new Random(10000);
            for (int i = 0; i < 1000; i++) {
                System.out.println("aaaaaaaaaaaaaaaaa=>" + i);
                int rowcount = batch.size++;
                ((BytesColumnVector) batch.cols[0]).setVal(rowcount, "a".getBytes());
                ((BytesColumnVector) batch.cols[1]).setVal(rowcount, (""+(time+1)).getBytes());
                for (int j = 2; j < 15002; j++) {
                    ((BytesColumnVector) batch.cols[j]).setVal(rowcount, (random.nextFloat()+"").getBytes());
                }
                if (batch.size == 100) {
                    writer.addRowBatch(batch);
                    batch.reset();
                    System.out.println("wwwwww");
                }

            }
            writer.addRowBatch(batch);
            writer.close();
            long e = System.currentTimeMillis();
            System.out.println("cost=>" + (e - time));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
